import scvelo as scv
import numpy as np
import pandas as pd
import scipy
import scanpy as sc
import os
sc.settings.vector_friendly = False
# scv.set_figure_params( dpi=300, dpi_save=300, frameon=False, figsize=(4, 4), format='png', fontsize=16)
scv.set_figure_params(figsize=(2, 5))
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')
treatment = None
adata_file = 'data/object/scvelo_all.h5ad'
adata = sc.read_h5ad('data/object/velocyto.h5ad')
obs = pd.read_csv('data/object/int/meta/meta.csv', index_col=0)
obsm = pd.read_csv('data/object/int/reductions/X_umap/reduction.csv', index_col=0)
population_names = ['Ery (1)', 'Ery (2)', 'Ery (3)', 'Ery (4)', 'Ery (5)', 'Ery (6)']
celltype_colours = [
'#FC9272',
'#FB6A4A',
'#EF3B2C',
'#CB181D',
'#A50F15',
'#67000D'
]
# Filter obs by Ery annotation and treatment
obs = obs[obs['leiden_annotation'].isin(population_names)]
if treatment is None:
obs = obs
else:
obs = obs[obs['treatment'] == treatment]
# Filter obsm by cell index
obsm = obsm[obsm.index.isin(obs.index)]
# Filter velocity adata by obs
adata = adata[adata.obs.index.isin(obs.index)]
# Order index to match velocity adata
obs = obs.reindex(adata.obs.index)
obsm = obsm.reindex(adata.obs.index)
adata.obs = obs
adata.obsm['X_umap'] = obsm.to_numpy()
adata.uns['leiden_annotation_colors'] = celltype_colours
sc.pl.umap(adata, color=['leiden', 'tissue', 'treatment', 'leiden_annotation', 'sample_rep', 'cc_phase_class', 'pMt_RNA', 'pHb_RNA', 'pRb_RNA'], wspace=1, ncols=3)
... storing 'sample_name' as categorical ... storing 'sample_rep' as categorical ... storing 'tissue' as categorical ... storing 'treatment' as categorical ... storing 'sample_group' as categorical ... storing 'sample_path' as categorical ... storing 'sample_dir' as categorical ... storing 'cellranger_class' as categorical ... storing 'cc_phase_class' as categorical ... storing 'label_main_immgen' as categorical ... storing 'label_fine_immgen' as categorical ... storing 'label_main_haemosphere' as categorical ... storing 'label_fine_haemosphere' as categorical ... storing 'qc_class' as categorical ... storing 'doublet_cluster' as categorical ... storing 'doublet_class' as categorical ... storing 'doublet_class_int' as categorical ... storing 'leiden_annotation' as categorical
adata_temp = adata.copy()
# HVG on all data
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.05, min_disp=0.1)
hvg_1 = adata.var_names[adata.var.highly_variable].tolist()
# HVG on scvelo filtered data
adata = adata_temp.copy()
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
hvg_2 = adata.var_names.tolist()
Filtered out 26253 genes that are detected 20 counts (shared). Normalized count data: X, spliced, unspliced. Extracted 2000 highly variable genes. Logarithmized X.
hvg = list(set(hvg_1) | set(hvg_2))
adata = adata_temp.copy()
adata = adata[:,hvg]
scv.pp.filter_and_normalize(adata)
scv.pp.moments(adata, n_neighbors=50)
Trying to set attribute `.obs` of view, copying.
Normalized count data: X, spliced, unspliced.
Logarithmized X.
computing neighbors
finished (0:00:56) --> added
'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
finished (0:00:07) --> added
'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
scv.tl.recover_dynamics(adata)
recovering dynamics (using 1/32 cores)
finished (0:32:15) --> added
'fit_pars', fitted parameters for splicing dynamics (adata.var)
scv.tl.velocity(adata,mode='dynamical')
scv.tl.velocity_graph(adata)
computing velocities
finished (0:00:14) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph (using 1/32 cores)
finished (0:01:21) --> added
'velocity_graph', sparse matrix with cosine correlations (adata.uns)
scv.tl.latent_time(adata)
computing terminal states
WARNING: Uncertain or fuzzy root cell identification. Please verify.
identified 3 regions of root cells and 1 region of end points .
finished (0:00:07) --> added
'root_cells', root cells of Markov diffusion process (adata.obs)
'end_points', end points of Markov diffusion process (adata.obs)
computing latent time using root_cells as prior
finished (0:00:08) --> added
'latent_time', shared time (adata.obs)
scv.pl.velocity_embedding_stream(adata, basis='X_umap', color=['leiden_annotation'])
computing velocity embedding
finished (0:00:08) --> added
'velocity_umap', embedded velocity vectors (adata.obsm)
sc.pl.umap(adata, color=['root_cells', 'end_points', 'velocity_pseudotime', 'latent_time'])
adata.write_h5ad(adata_file)